// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v $
// $Author: derrickoswald $
// $Date: 2005/10/25 01:26:09 $
// $Revision: 1.9 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.lexer;

import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.ParserException;

import java.io.*;

/**
 * A source of characters based on an InputStream such as from a URLConnection.
 */
public class InputStreamSource extends Source {
    /**
     * An initial buffer size. Has a default value of {16384}.
     */
    public static int BUFFER_SIZE = 16384;

    /**
     * The stream of bytes. Set to <code>null</code> when the source is closed.
     */
    protected transient InputStream mStream;

    /**
     * The character set in use.
     */
    protected String mEncoding;

    /**
     * The converter from bytes to characters.
     */
    protected transient InputStreamReader mReader;

    /**
     * The characters read so far.
     */
    protected char[] mBuffer;

    /**
     * The number of valid bytes in the buffer.
     */
    protected int mLevel;

    /**
     * The offset of the next byte returned by read().
     */
    protected int mOffset;

    /**
     * The bookmark.
     */
    protected int mMark;

    /**
     * Create a source of characters using the default character set.
     *
     * @param stream The stream of bytes to use.
     * @throws UnsupportedEncodingException If the default character set is unsupported.
     */
    public InputStreamSource(InputStream stream) throws UnsupportedEncodingException {
        this(stream, null, BUFFER_SIZE);
    }

    /**
     * Create a source of characters.
     *
     * @param stream  The stream of bytes to use.
     * @param charset The character set used in encoding the stream.
     * @throws UnsupportedEncodingException If the character set is unsupported.
     */
    public InputStreamSource(InputStream stream, String charset)
            throws UnsupportedEncodingException {
        this(stream, charset, BUFFER_SIZE);
    }

    /**
     * Create a source of characters.
     *
     * @param stream  The stream of bytes to use.
     * @param charset The character set used in encoding the stream.
     * @param size    The initial character buffer size.
     * @throws UnsupportedEncodingException If the character set is unsupported.
     */
    public InputStreamSource(InputStream stream, String charset, int size)
            throws UnsupportedEncodingException {
        if (null == stream)
            stream = new Stream(null);
        else
            // bug #1044707 mark()/reset() issues
            if (!stream.markSupported())
                // wrap the stream so we can reset
                stream = new Stream(stream);
        // else
        // just because mark is supported doesn't guarantee
        // proper reset operation; there is no call to mark
        // in this code, so if reset misbehaves there is an
        // appropriate message in setEncoding() to suggest
        // wraping it in a Stream.
        // This was deemed better than an attempt to call
        // reset at this point just to check if we would
        // succeed later, or to call mark with an arbitrary
        // lookahead size
        mStream = stream;
        if (null == charset) {
            mReader = new InputStreamReader(stream);
            mEncoding = mReader.getEncoding();
        } else {
            mEncoding = charset;
            mReader = new InputStreamReader(stream, charset);
        }
        mBuffer = new char[size];
        mLevel = 0;
        mOffset = 0;
        mMark = -1;
    }

    //
    // Serialization support
    //

    /**
     * Serialization support.
     *
     * @param out Where to write this object.
     * @throws IOException If serialization has a problem.
     */
    private void writeObject(ObjectOutputStream out) throws IOException {
        int offset;
        char[] buffer;

        if (null != mStream) {
            // remember the offset, drain the input stream, restore the offset
            offset = mOffset;
            buffer = new char[4096];
            while (EOF != read(buffer))
                ;
            mOffset = offset;
        }

        out.defaultWriteObject();
    }

    /**
     * Deserialization support.
     *
     * @param in Where to read this object from.
     * @throws IOException If deserialization has a problem.
     */
    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        if (null != mBuffer) // buffer is null when destroy's been called
            // pretend we're open, mStream goes null when exhausted
            mStream = new ByteArrayInputStream(new byte[0]);
    }

    /**
     * Get the input stream being used.
     *
     * @return The current input stream.
     */
    public InputStream getStream() {
        return (mStream);
    }

    /**
     * Get the encoding being used to convert characters.
     *
     * @return The current encoding.
     */
    public String getEncoding() {
        return (mEncoding);
    }

    /**
     * Begins reading from the source with the given character set. If the
     * current encoding is the same as the requested encoding, this method is a
     * no-op. Otherwise any subsequent characters read from this page will have
     * been decoded using the given character set.
     * <p/>
     * Some magic happens here to obtain this result if characters have already
     * been consumed from this source. Since a Reader cannot be dynamically
     * altered to use a different character set, the underlying stream is reset,
     * a new Source is constructed and a comparison made of the characters read
     * so far with the newly read characters up to the current position. If a
     * difference is encountered, or some other problem occurs, an exception is
     * thrown.
     *
     * @param character_set The character set to use to convert bytes into characters.
     * @throws ParserException If a character mismatch occurs between characters already
     *                         provided and those that would have been returned had the
     *                         new character set been in effect from the beginning. An
     *                         exception is also thrown if the underlying stream won't
     *                         put up with these shenanigans.
     */
    public void setEncoding(String character_set) throws ParserException {
        String encoding;
        InputStream stream;
        char[] buffer;
        int offset;
        char[] new_chars;

        encoding = getEncoding();
        if (!encoding.equalsIgnoreCase(character_set)) {
            stream = getStream();
            try {
                buffer = mBuffer;
                offset = mOffset;
                stream.reset();
                try {
                    mEncoding = character_set;
                    mReader = new InputStreamReader(stream, character_set);
                    mBuffer = new char[mBuffer.length];
                    mLevel = 0;
                    mOffset = 0;
                    mMark = -1;
                    if (0 != offset) {
                        new_chars = new char[offset];
                        if (offset != read(new_chars))
                            throw new ParserException("reset stream failed");
                        for (int i = 0; i < offset; i++)
                            if (new_chars[i] != buffer[i])
                                throw new EncodingChangeException("character mismatch (new: "
                                        + new_chars[i] + " [0x"
                                        + Integer.toString(new_chars[i], 16) + "] != old: "
                                        + " [0x" + Integer.toString(buffer[i], 16) + buffer[i]
                                        + "]) for encoding change from " + encoding + " to "
                                        + character_set + " at character offset " + i);
                    }
                } catch (IOException ioe) {
                    throw new ParserException(ioe.getMessage(), ioe);
                }
            } catch (IOException ioe) { // bug #1044707 mark()/reset() issues
                throw new ParserException("Stream reset failed (" + ioe.getMessage()
                        + "), try wrapping it with a org.htmlparser.lexer.Stream", ioe);
            }
        }
    }

    /**
     * Fetch more characters from the underlying reader. Has no effect if the
     * underlying reader has been drained.
     *
     * @param min The minimum to read.
     * @throws IOException If the underlying reader read() throws one.
     */
    protected void fill(int min) throws IOException {
        char[] buffer;
        int size;
        int read;

        if (null != mReader) // mReader goes null when it's been sucked dry
        {
            size = mBuffer.length - mLevel; // available space
            if (size < min) // oops, better get some buffer space
            {
                // unknown length... keep doubling
                size = mBuffer.length * 2;
                read = mLevel + min;
                if (size < read) // or satisfy min, whichever is greater
                    size = read;
                else
                    min = size - mLevel; // read the max
                buffer = new char[size];
            } else {
                buffer = mBuffer;
                min = size;
            }

            // read into the end of the 'new' buffer
            read = mReader.read(buffer, mLevel, min);
            if (EOF == read) {
                mReader.close();
                mReader = null;
            } else {
                if (mBuffer != buffer) { // copy the bytes previously read
                    System.arraycopy(mBuffer, 0, buffer, 0, mLevel);
                    mBuffer = buffer;
                }
                mLevel += read;
            }
            // todo, should repeat on read shorter than original min
        }
    }

    //
    // Reader overrides
    //

    /**
     * Does nothing. It's supposed to close the source, but use destroy()
     * instead.
     *
     * @throws IOException <em>not used</em>
     * @see #destroy
     */
    public void close() throws IOException {
    }

    /**
     * Read a single character. This method will block until a character is
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * @return The character read, as an integer in the range 0 to 65535 (
     *         <tt>0x00-0xffff</tt>), or {@link #EOF EOF} if the end of the
     *         stream has been reached
     * @throws IOException If an I/O error occurs.
     */
    public int read() throws IOException {
        int ret;

        if (mLevel - mOffset < 1) {
            if (null == mStream) throw new IOException("source is closed");
            fill(1);
            if (mOffset >= mLevel)
                ret = EOF;
            else
                ret = mBuffer[mOffset++];
        } else
            ret = mBuffer[mOffset++];

        return (ret);
    }

    /**
     * Read characters into a portion of an array. This method will block until
     * some input is available, an I/O error occurs, or the end of the stream is
     * reached.
     *
     * @param cbuf Destination buffer
     * @param off  Offset at which to start storing characters
     * @param len  Maximum number of characters to read
     * @return The number of characters read, or {@link #EOF EOF} if the end of
     *         the stream has been reached
     * @throws IOException If an I/O error occurs.
     */
    public int read(char[] cbuf, int off, int len) throws IOException {
        int ret;

        if (null == mStream) throw new IOException("source is closed");
        if ((null == cbuf) || (0 > off) || (0 > len))
            throw new IOException("illegal argument read (" + ((null == cbuf) ? "null" : "cbuf")
                    + ", " + off + ", " + len + ")");
        if (mLevel - mOffset < len) fill(len - (mLevel - mOffset)); // minimum
        // to
        // satisfy
        // this
        // request
        if (mOffset >= mLevel)
            ret = EOF;
        else {
            ret = Math.min(mLevel - mOffset, len);
            System.arraycopy(mBuffer, mOffset, cbuf, off, ret);
            mOffset += ret;
        }

        return (ret);
    }

    /**
     * Read characters into an array. This method will block until some input is
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * @param cbuf Destination buffer.
     * @return The number of characters read, or {@link #EOF EOF} if the end of
     *         the stream has been reached.
     * @throws IOException If an I/O error occurs.
     */
    public int read(char[] cbuf) throws IOException {
        return (read(cbuf, 0, cbuf.length));
    }

    /**
     * Reset the source. Repositions the read point to begin at zero.
     *
     * @throws IllegalStateException If the source has been closed.
     */
    public void reset() throws IllegalStateException {
        if (null == mStream) throw new IllegalStateException("source is closed");
        if (-1 != mMark)
            mOffset = mMark;
        else
            mOffset = 0;
    }

    /**
     * Tell whether this source supports the mark() operation.
     *
     * @return <code>true</code>.
     */
    public boolean markSupported() {
        return (true);
    }

    /**
     * Mark the present position in the source. Subsequent calls to
     * {@link #reset()} will attempt to reposition the source to this point.
     *
     * @param readAheadLimit <em>Not used.</em>
     * @throws IOException If the source is closed.
     */
    public void mark(int readAheadLimit) throws IOException {
        if (null == mStream) throw new IOException("source is closed");
        mMark = mOffset;
    }

    /**
     * Tell whether this source is ready to be read.
     *
     * @return <code>true</code> if the next read() is guaranteed not to block
     *         for input, <code>false</code> otherwise. Note that returning
     *         false does not guarantee that the next read will block.
     * @throws IOException If the source is closed.
     */
    public boolean ready() throws IOException {
        if (null == mStream) throw new IOException("source is closed");
        return (mOffset < mLevel);
    }

    /**
     * Skip characters. This method will block until some characters are
     * available, an I/O error occurs, or the end of the stream is reached.
     * <em>Note: n is treated as an int</em>
     *
     * @param n The number of characters to skip.
     * @return The number of characters actually skipped
     * @throws IllegalArgumentException If <code>n</code> is negative.
     * @throws IOException              If an I/O error occurs.
     */
    public long skip(long n) throws IOException, IllegalArgumentException {
        long ret;

        if (null == mStream) throw new IOException("source is closed");
        if (0 > n)
            throw new IllegalArgumentException("cannot skip backwards");
        else {
            if (mLevel - mOffset < n) fill((int) (n - (mLevel - mOffset))); // minimum
            // to
            // satisfy
            // this
            // request
            if (mOffset >= mLevel)
                ret = EOF;
            else {
                ret = Math.min(mLevel - mOffset, n);
                mOffset += ret;
            }
        }

        return (ret);
    }

    //
    // Methods not in your Daddy's Reader
    //

    /**
     * Undo the read of a single character.
     *
     * @throws IOException If the source is closed or no characters have been read.
     */
    public void unread() throws IOException {
        if (null == mStream) throw new IOException("source is closed");
        if (0 < mOffset)
            mOffset--;
        else
            throw new IOException("can't unread no characters");
    }

    /**
     * Retrieve a character again.
     *
     * @param offset The offset of the character.
     * @return The character at <code>offset</code>.
     * @throws IOException If the offset is beyond {@link #offset()} or the source is
     *                     closed.
     */
    public char getCharacter(int offset) throws IOException {
        char ret;

        if (null == mStream) throw new IOException("source is closed");
        if (offset >= mBuffer.length)
            throw new IOException("illegal read ahead");
        else
            ret = mBuffer[offset];

        return (ret);
    }

    /**
     * Retrieve characters again.
     *
     * @param array  The array of characters.
     * @param offset The starting position in the array where characters are to be
     *               placed.
     * @param start  The starting position, zero based.
     * @param end    The ending position (exclusive, i.e. the character at the
     *               ending position is not included), zero based.
     * @throws IOException If the start or end is beyond {@link #offset()} or the
     *                     source is closed.
     */
    public void getCharacters(char[] array, int offset, int start, int end) throws IOException {
        if (null == mStream) throw new IOException("source is closed");
        System.arraycopy(mBuffer, start, array, offset, end - start);
    }

    /**
     * Retrieve a string.
     *
     * @param offset The offset of the first character.
     * @param length The number of characters to retrieve.
     * @return A string containing the <code>length</code> characters at
     *         <code>offset</code>.
     * @throws IOException If the offset or (offset + length) is beyond
     *                     {@link #offset()} or the source is closed.
     */
    public String getString(int offset, int length) throws IOException {
        String ret;

        if (null == mStream) throw new IOException("source is closed");
        if (offset + length > mBuffer.length)
            throw new IOException("illegal read ahead");
        else
            ret = new String(mBuffer, offset, length);

        return (ret);
    }

    /**
     * Append characters already read into a <code>StringBuffer</code>.
     *
     * @param buffer The buffer to append to.
     * @param offset The offset of the first character.
     * @param length The number of characters to retrieve.
     * @throws IOException If the offset or (offset + length) is beyond
     *                     {@link #offset()} or the source is closed.
     */
    public void getCharacters(StringBuffer buffer, int offset, int length) throws IOException {
        if (null == mStream) throw new IOException("source is closed");
        buffer.append(mBuffer, offset, length);
    }

    /**
     * Close the source. Once a source has been closed, further {@link #read()
     * read}, {@link #ready ready}, {@link #mark mark}, {@link #reset reset},
     * {@link #skip skip}, {@link #unread unread}, {@link #getCharacter
     * getCharacter} or {@link #getString getString} invocations will throw an
     * IOException. Closing a previously-closed source, however, has no effect.
     *
     * @throws IOException If an I/O error occurs
     */
    public void destroy() throws IOException {
        mStream = null;
        if (null != mReader) mReader.close();
        mReader = null;
        mBuffer = null;
        mLevel = 0;
        mOffset = 0;
        mMark = -1;
    }

    /**
     * Get the position (in characters).
     *
     * @return The number of characters that have already been read, or
     *         {@link #EOF EOF} if the source is closed.
     */
    public int offset() {
        int ret;

        if (null == mStream)
            ret = EOF;
        else
            ret = mOffset;

        return (ret);
    }

    /**
     * Get the number of available characters.
     *
     * @return The number of characters that can be read without blocking or
     *         zero if the source is closed.
     */
    public int available() {
        int ret;

        if (null == mStream)
            ret = 0;
        else
            ret = mLevel - mOffset;

        return (ret);
    }
}
